Skip to content

Commit dc8121f

Browse files
authored
Add Base32 decode fastpath (#36)
* Add fast path to base32 decode for data without null characters * Add benchmark Base32.decodeIgnoreNullCharacters * Add options to String.base32decoded * Structify errors * Add encode/decode random buffers test
1 parent c270645 commit dc8121f

File tree

4 files changed

+197
-23
lines changed

4 files changed

+197
-23
lines changed

Benchmarks/Benchmarks/BaseN/BaseN.swift

+11
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ let benchmarks = {
3434
}
3535
}
3636

37+
Benchmark("Base32.decodeIgnoreNullCharacters") { benchmark in
38+
let bytes = Array(UInt8(0) ... UInt8(255))
39+
let base32 = Base32.encodeToString(bytes: bytes)
40+
41+
benchmark.startMeasurement()
42+
43+
for _ in benchmark.scaledIterations {
44+
try blackHole(Base32.decode(string: base32, options: .allowNullCharacters))
45+
}
46+
}
47+
3748
Benchmark("Base64.encode") { benchmark in
3849
let bytes = Array(UInt8(0) ... UInt8(255))
3950

Sources/ExtrasBase64/Base32.swift

+118-16
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ public extension String {
66
self = Base32.encodeToString(bytes: bytes, options: options)
77
}
88

9-
/// Decode base32 encoded strin
10-
func base32decoded() throws -> [UInt8] {
11-
try Base32.decode(string: self)
9+
/// Decode base32 encoded string
10+
func base32decoded(options: Base32.DecodingOptions = []) throws -> [UInt8] {
11+
try Base32.decode(string: self, options: options)
1212
}
1313
}
1414

@@ -22,8 +22,25 @@ public enum Base32 {
2222
public static let omitPaddingCharacter = EncodingOptions(rawValue: UInt(1 << 0))
2323
}
2424

25-
public enum DecodingError: Swift.Error, Equatable {
26-
case invalidCharacter(UInt8)
25+
/// Decoding options
26+
public struct DecodingOptions: OptionSet {
27+
public let rawValue: UInt
28+
public init(rawValue: UInt) { self.rawValue = rawValue }
29+
30+
public static let allowNullCharacters = DecodingOptions(rawValue: UInt(1 << 0))
31+
}
32+
33+
public struct DecodingError: Swift.Error, Equatable {
34+
enum _Internal {
35+
case invalidCharacter
36+
}
37+
38+
fileprivate let value: _Internal
39+
init(_ value: _Internal) {
40+
self.value = value
41+
}
42+
43+
public static var invalidCharacter: Self { .init(.invalidCharacter) }
2744
}
2845

2946
/// Base32 Encode a buffer to an array of bytes
@@ -70,7 +87,10 @@ public enum Base32 {
7087
}
7188

7289
/// Base32 decode string
73-
public static func decode(string encoded: String) throws -> [UInt8] {
90+
public static func decode(
91+
string encoded: String,
92+
options: DecodingOptions = []
93+
) throws -> [UInt8] {
7494
let decoded = try encoded.utf8.withContiguousStorageIfAvailable { characterPointer -> [UInt8] in
7595
guard characterPointer.count > 0 else {
7696
return []
@@ -80,7 +100,11 @@ public enum Base32 {
80100

81101
return try characterPointer.withMemoryRebound(to: UInt8.self) { input -> [UInt8] in
82102
try [UInt8](unsafeUninitializedCapacity: capacity) { output, length in
83-
length = try Self._decode(from: input, into: output)
103+
if options.contains(.allowNullCharacters) {
104+
length = try Self._decode(from: input[...], into: output[...])
105+
} else {
106+
length = try Self._strictDecode(from: input, into: output)
107+
}
84108
}
85109
}
86110
}
@@ -95,7 +119,10 @@ public enum Base32 {
95119
}
96120

97121
/// Base32 decode a buffer to an array of UInt8
98-
public static func decode<Buffer: Collection>(bytes: Buffer) throws -> [UInt8] where Buffer.Element == UInt8 {
122+
public static func decode<Buffer: Collection>(
123+
bytes: Buffer,
124+
options: DecodingOptions = []
125+
) throws -> [UInt8] where Buffer.Element == UInt8 {
99126
guard bytes.count > 0 else {
100127
return []
101128
}
@@ -104,7 +131,11 @@ public enum Base32 {
104131
let outputLength = ((input.count + 7) / 8) * 5
105132

106133
return try [UInt8](unsafeUninitializedCapacity: outputLength) { output, length in
107-
length = try Self._decode(from: input, into: output)
134+
if options.contains(.allowNullCharacters) {
135+
length = try Self._decode(from: input[...], into: output[...])
136+
} else {
137+
length = try Self._strictDecode(from: input, into: output)
138+
}
108139
}
109140
}
110141

@@ -153,27 +184,98 @@ extension Base32 {
153184
/* F8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
154185
]
155186

187+
private static let strictDecodeTable: [UInt8] = [
188+
/* 00 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
189+
/* 08 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
190+
/* 10 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
191+
/* 18 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
192+
/* 20 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
193+
/* 28 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
194+
/* 30 */ 0x80, 0x80, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
195+
/* 38 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0xC0, 0x80, 0x80,
196+
/* 40 */ 0x80, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
197+
/* 48 */ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
198+
/* 50 */ 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
199+
/* 58 */ 0x17, 0x18, 0x19, 0x80, 0x80, 0x80, 0x80, 0x80,
200+
/* 60 */ 0x80, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
201+
/* 68 */ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
202+
/* 60 */ 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
203+
/* 68 */ 0x17, 0x18, 0x19, 0x80, 0x80, 0x80, 0x80, 0x80,
204+
205+
/* 80 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
206+
/* 88 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
207+
/* 90 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
208+
/* 98 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
209+
/* A0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
210+
/* A8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
211+
/* B0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
212+
/* B8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
213+
/* C0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
214+
/* C8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
215+
/* D0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
216+
/* D8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
217+
/* E0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
218+
/* E8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
219+
/* F0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
220+
/* F8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
221+
]
222+
156223
private static let encodeTable: [UInt8] = [
157224
/* 00 */ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
158225
/* 08 */ 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
159226
/* 10 */ 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
160227
/* 18 */ 0x59, 0x5A, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
161228
]
162229

163-
private static func _decode(from input: UnsafeBufferPointer<UInt8>, into output: UnsafeMutableBufferPointer<UInt8>) throws -> Int {
230+
/// Decode Base32 assuming there are no null characters
231+
private static func _strictDecode(from input: UnsafeBufferPointer<UInt8>, into output: UnsafeMutableBufferPointer<UInt8>) throws -> Int {
164232
guard input.count != 0 else { return 0 }
165233

166-
var bitsLeft = 0
167-
var buffer: UInt32 = 0
168234
var outputIndex = 0
235+
// work out how many blocks can go through the fast path. Last block
236+
// should be passed to the slow path
237+
let inputMinusLastBlock = (input.count - 1) & ~0x7
169238
var i = 0
170-
loop: while i < input.count {
239+
while i < inputMinusLastBlock {
240+
let v1 = self.strictDecodeTable[Int(input[i])]
241+
let v2 = self.strictDecodeTable[Int(input[i + 1])]
242+
let v3 = self.strictDecodeTable[Int(input[i + 2])]
243+
let v4 = self.strictDecodeTable[Int(input[i + 3])]
244+
let v5 = self.strictDecodeTable[Int(input[i + 4])]
245+
let v6 = self.strictDecodeTable[Int(input[i + 5])]
246+
let v7 = self.strictDecodeTable[Int(input[i + 6])]
247+
let v8 = self.strictDecodeTable[Int(input[i + 7])]
248+
let vCombined = v1 | v2 | v3 | v4 | v5 | v6 | v7 | v8
249+
if (vCombined & ~0x1F) != 0 {
250+
throw DecodingError.invalidCharacter
251+
}
252+
i += 8
253+
output[outputIndex] = (v1 << 3) | (v2 >> 2)
254+
output[outputIndex + 1] = (v2 << 6) | (v3 << 1) | (v4 >> 4)
255+
output[outputIndex + 2] = (v4 << 4) | (v5 >> 1)
256+
output[outputIndex + 3] = (v5 << 7) | (v6 << 2) | (v7 >> 3)
257+
output[outputIndex + 4] = (v7 << 5) | v8
258+
outputIndex += 5
259+
}
260+
261+
return try self._decode(from: input[i...], into: output[outputIndex...])
262+
}
263+
264+
/// Decode Base32 with the possibility of null characters or padding
265+
private static func _decode(from input: UnsafeBufferPointer<UInt8>.SubSequence, into output: UnsafeMutableBufferPointer<UInt8>.SubSequence) throws -> Int {
266+
guard input.count != 0 else { return output.startIndex }
267+
var output = output
268+
var bitsLeft = 0
269+
var buffer: UInt32 = 0
270+
var outputIndex = output.startIndex
271+
var i = input.startIndex
272+
loop: while i < input.endIndex {
171273
let index = Int(input[i])
172274
i += 1
173275
let v = self.decodeTable[index]
174276
switch v {
175277
case 0x80:
176-
throw DecodingError.invalidCharacter(UInt8(index))
278+
throw DecodingError.invalidCharacter
177279
case 0x40:
178280
continue
179281
case 0xC0:
@@ -191,9 +293,9 @@ extension Base32 {
191293
}
192294
}
193295
// Any characters left should be padding
194-
while i < input.count {
296+
while i < input.endIndex {
195297
let index = Int(input[i])
196-
guard self.decodeTable[index] == 0xC0 else { throw DecodingError.invalidCharacter(UInt8(index)) }
298+
guard self.decodeTable[index] == 0xC0 else { throw DecodingError.invalidCharacter }
197299
i += 1
198300
}
199301
return outputIndex

Sources/ExtrasBase64/Base64.swift

+17-5
Original file line numberDiff line numberDiff line change
@@ -334,11 +334,23 @@ extension Base64 {
334334
public static let omitPaddingCharacter = DecodingOptions(rawValue: UInt(1 << 1))
335335
}
336336

337-
public enum DecodingError: Error, Equatable {
338-
case invalidLength
339-
case invalidCharacter(UInt8)
340-
case unexpectedPaddingCharacter
341-
case unexpectedEnd
337+
public struct DecodingError: Error, Equatable {
338+
fileprivate enum _Internal: Error, Equatable {
339+
case invalidLength
340+
case invalidCharacter(UInt8)
341+
case unexpectedPaddingCharacter
342+
case unexpectedEnd
343+
}
344+
345+
fileprivate let value: _Internal
346+
fileprivate init(_ value: _Internal) {
347+
self.value = value
348+
}
349+
350+
public static var invalidLength: Self { .init(.invalidLength) }
351+
public static func invalidCharacter(_ character: UInt8) -> Self { .init(.invalidCharacter(character)) }
352+
public static var unexpectedPaddingCharacter: Self { .init(.unexpectedPaddingCharacter) }
353+
public static var unexpectedEnd: Self { .init(.unexpectedEnd) }
342354
}
343355

344356
@inlinable

Tests/ExtrasBase64Tests/Base32Tests.swift

+51-2
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,29 @@ class Base32Tests: XCTestCase {
5454
XCTAssertEqual(decoded, expected)
5555
}
5656

57+
func testBase32DecodingWithNullCharacters() {
58+
let base32 = """
59+
AAAQEAYEAUDAOCAJBIFQYDIOB4IBCEQTCQKRMFYYDENBWHA5D
60+
YPSAIJCEMSCKJRHFAUSUKZMFUXC6MBRGIZTINJWG44DSOR3HQ
61+
6T4P2AIFBEGRCFIZDUQSKKJNGE2TSPKBIVEU2UKVLFOWCZLJN
62+
VYXK6L5QGCYTDMRSWMZ3INFVGW3DNNZXXA4LSON2HK5TXPB4X
63+
U634PV7H7AEBQKBYJBMGQ6EITCULRSGY5D4QSGJJHFEVS2LZR
64+
GM2TOOJ3HU7UCQ2FI5EUWTKPKFJVKV2ZLNOV6YLDMVTWS23NN
65+
5YXG5LXPF5X274BQOCYPCMLRWHZDE4VS6MZXHM7UGR2LJ5JVO
66+
W27MNTWW33TO55X7A4HROHZHF43T6R2PK5PWO33XP6DY7F47U
67+
6X3PP6HZ7L57Z7P674
68+
"""
69+
70+
let expected = Array(UInt8(0) ... UInt8(255))
71+
var decoded: [UInt8]?
72+
XCTAssertNoThrow(decoded = try Base32.decode(bytes: base32.utf8, options: .allowNullCharacters))
73+
XCTAssertEqual(decoded, expected)
74+
XCTAssertThrowsError(decoded = try Base32.decode(bytes: base32.utf8)) { _ in }
75+
}
76+
5777
func testBase32DecodingWithPoop() {
5878
XCTAssertThrowsError(_ = try Base32.decode(bytes: "💩".utf8)) { error in
59-
XCTAssertEqual(error as? Base32.DecodingError, .invalidCharacter(240))
79+
XCTAssertEqual(error as? Base32.DecodingError, .invalidCharacter)
6080
}
6181
}
6282

@@ -103,12 +123,41 @@ class Base32Tests: XCTestCase {
103123
}
104124

105125
func testBase32EncodeFoobarWithPadding() {
106-
XCTAssertEqual(String(base32Encoding: "".utf8), "")
107126
XCTAssertEqual(String(base32Encoding: "f".utf8), "MY======")
108127
XCTAssertEqual(String(base32Encoding: "fo".utf8), "MZXQ====")
109128
XCTAssertEqual(String(base32Encoding: "foo".utf8), "MZXW6===")
110129
XCTAssertEqual(String(base32Encoding: "foob".utf8), "MZXW6YQ=")
111130
XCTAssertEqual(String(base32Encoding: "fooba".utf8), "MZXW6YTB")
112131
XCTAssertEqual(String(base32Encoding: "foobar".utf8), "MZXW6YTBOI======")
113132
}
133+
134+
func testBase32DecodeFoobar() {
135+
XCTAssertEqual(try "".base32decoded(), .init("".utf8))
136+
XCTAssertEqual(try "MY".base32decoded(), .init("f".utf8))
137+
XCTAssertEqual(try "MZXQ".base32decoded(), .init("fo".utf8))
138+
XCTAssertEqual(try "MZXW6".base32decoded(), .init("foo".utf8))
139+
XCTAssertEqual(try "MZXW6YQ".base32decoded(), .init("foob".utf8))
140+
XCTAssertEqual(try "MZXW6YTB".base32decoded(), .init("fooba".utf8))
141+
XCTAssertEqual(try "MZXW6YTBOI".base32decoded(), .init("foobar".utf8))
142+
}
143+
144+
func testBase32DecodeFoobarWithPadding() {
145+
XCTAssertEqual(try "MY======".base32decoded(), .init("f".utf8))
146+
XCTAssertEqual(try "MZXQ====".base32decoded(), .init("fo".utf8))
147+
XCTAssertEqual(try "MZXW6===".base32decoded(), .init("foo".utf8))
148+
XCTAssertEqual(try "MZXW6YQ=".base32decoded(), .init("foob".utf8))
149+
XCTAssertEqual(try "MZXW6YTB".base32decoded(), .init("fooba".utf8))
150+
XCTAssertEqual(try "MZXW6YTBOI======".base32decoded(), .init("foobar".utf8))
151+
}
152+
153+
func testBase32EncodeDecode() throws {
154+
for _ in 0 ..< 100 {
155+
let buffer: [UInt8] = (0 ..< Int.random(in: 1 ..< 8192)).map { _ in UInt8.random(in: .min ... .max) }
156+
let base32 = String(base32Encoding: buffer)
157+
let buffer2 = try base32.base32decoded(options: .allowNullCharacters)
158+
let buffer3 = try base32.base32decoded()
159+
XCTAssertEqual(buffer, buffer2)
160+
XCTAssertEqual(buffer, buffer3)
161+
}
162+
}
114163
}

0 commit comments

Comments
 (0)