@@ -34,6 +34,9 @@ type Bitmap struct {
34
34
data []uint16
35
35
keys node
36
36
37
+ // This _ptr is only used when we start with a []byte instead of a
38
+ // []uint16. Because we do an unsafe conversion to []uint16 data, and hence,
39
+ // do NOT own a valid pointer to the underlying array.
37
40
_ptr []byte
38
41
39
42
// memMoved keeps track of how many uint16 moves we had to do. The smaller
@@ -44,33 +47,34 @@ type Bitmap struct {
44
47
// FromBuffer returns a pointer to bitmap corresponding to the given buffer. This bitmap shouldn't
45
48
// be modified because it might corrupt the given buffer.
46
49
func FromBuffer (data []byte ) * Bitmap {
50
+ assert (len (data )% 2 == 0 )
47
51
if len (data ) < 8 {
48
52
return NewBitmap ()
49
53
}
50
54
du := toUint16Slice (data )
51
55
x := toUint64Slice (du [:4 ])[indexNodeSize ]
52
56
return & Bitmap {
53
57
data : du ,
54
- _ptr : data ,
58
+ _ptr : data , // Keep a hold of data, otherwise GC would do its thing.
55
59
keys : toUint64Slice (du [:x ]),
56
60
}
57
61
}
58
62
59
63
// FromBufferWithCopy creates a copy of the given buffer and returns a bitmap based on the copied
60
64
// buffer. This bitmap is safe for both read and write operations.
61
- func FromBufferWithCopy (data []byte ) * Bitmap {
62
- if len (data ) < 8 {
65
+ func FromBufferWithCopy (src []byte ) * Bitmap {
66
+ assert (len (src )% 2 == 0 )
67
+ if len (src ) < 8 {
63
68
return NewBitmap ()
64
69
}
65
- dup := make ([] byte , len ( data ) )
66
- copy ( dup , data )
67
- du := toUint16Slice ( dup )
68
- x := toUint64Slice (du [:4 ])[indexNodeSize ]
70
+ src16 := toUint16Slice ( src )
71
+ dst16 := make ([] uint16 , len ( src16 ) )
72
+ copy ( dst16 , src16 )
73
+ x := toUint64Slice (dst16 [:4 ])[indexNodeSize ]
69
74
70
75
return & Bitmap {
71
- data : du ,
72
- _ptr : dup ,
73
- keys : toUint64Slice (du [:x ]),
76
+ data : dst16 ,
77
+ keys : toUint64Slice (dst16 [:x ]),
74
78
}
75
79
}
76
80
@@ -117,6 +121,25 @@ func NewBitmapWith(numKeys int) *Bitmap {
117
121
return ra
118
122
}
119
123
124
+ func (ra * Bitmap ) initSpaceForKeys (N int ) {
125
+ if N == 0 {
126
+ return
127
+ }
128
+ curSize := uint64 (len (ra .keys ) * 4 ) // U64 -> U16
129
+ bySize := uint64 (N * 8 ) // 2xU64 (key, value) -> 2x4xU16
130
+
131
+ // The following code is borrowed from setKey.
132
+ ra .scootRight (curSize , bySize )
133
+ ra .keys = toUint64Slice (ra .data [:curSize + bySize ])
134
+ ra .keys .setNodeSize (int (curSize + bySize ))
135
+ assert (1 == ra .keys .numKeys ()) // This initialization assumes that the number of keys are 1.
136
+
137
+ // The containers have moved to the right bySize. So, update their offsets.
138
+ // Currently, there's only one container.
139
+ val := ra .keys .val (0 )
140
+ ra .keys .setAt (valOffset (0 ), val + uint64 (bySize ))
141
+ }
142
+
120
143
// setKey sets a key and container offset.
121
144
func (ra * Bitmap ) setKey (k uint64 , offset uint64 ) uint64 {
122
145
if added := ra .keys .set (k , offset ); ! added {
@@ -135,7 +158,7 @@ func (ra *Bitmap) setKey(k uint64, offset uint64) uint64 {
135
158
bySize = math .MaxUint16
136
159
}
137
160
138
- ra .scootRight (curSize , uint16 ( bySize ) )
161
+ ra .scootRight (curSize , bySize )
139
162
ra .keys = toUint64Slice (ra .data [:curSize + bySize ])
140
163
ra .keys .setNodeSize (int (curSize + bySize ))
141
164
@@ -151,7 +174,7 @@ func (ra *Bitmap) setKey(k uint64, offset uint64) uint64 {
151
174
return offset + bySize
152
175
}
153
176
154
- func (ra * Bitmap ) fastExpand (bySize uint16 ) {
177
+ func (ra * Bitmap ) fastExpand (bySize uint64 ) {
155
178
prev := len (ra .keys ) * 4 // Multiply by 4 to convert from u16 to u64.
156
179
157
180
// This following statement also works. But, given how much fastExpand gets
@@ -170,14 +193,15 @@ func (ra *Bitmap) fastExpand(bySize uint16) {
170
193
out := make ([]uint16 , cap (ra .data )+ growBy )
171
194
copy (out , ra .data )
172
195
ra .data = out [:toSize ]
196
+ ra ._ptr = nil // Allow Go to GC whatever this was pointing to.
173
197
// Re-reference ra.keys correctly because underlying array has changed.
174
198
ra .keys = toUint64Slice (ra .data [:prev ])
175
199
}
176
200
177
201
// scootRight isn't aware of containers. It's going to create empty space of
178
202
// bySize at the given offset in ra.data. The offset doesn't need to line up
179
203
// with a container.
180
- func (ra * Bitmap ) scootRight (offset uint64 , bySize uint16 ) {
204
+ func (ra * Bitmap ) scootRight (offset uint64 , bySize uint64 ) {
181
205
left := ra .data [offset :]
182
206
183
207
ra .fastExpand (bySize ) // Expand the buffer.
@@ -198,7 +222,7 @@ func (ra *Bitmap) scootLeft(offset uint64, size uint64) {
198
222
199
223
func (ra * Bitmap ) newContainer (sz uint16 ) uint64 {
200
224
offset := uint64 (len (ra .data ))
201
- ra .fastExpand (sz )
225
+ ra .fastExpand (uint64 ( sz ) )
202
226
Memclr (ra .data [offset : offset + uint64 (sz )])
203
227
ra .data [offset ] = sz
204
228
return offset
@@ -223,7 +247,7 @@ func (ra *Bitmap) expandContainer(offset uint64) {
223
247
}
224
248
225
249
// Select the portion to the right of the container, beyond its right boundary.
226
- ra .scootRight (offset + uint64 (sz ), bySize )
250
+ ra .scootRight (offset + uint64 (sz ), uint64 ( bySize ) )
227
251
ra .keys .updateOffsets (offset , uint64 (bySize ), true )
228
252
229
253
if sz < 2048 {
@@ -274,7 +298,7 @@ func (ra *Bitmap) copyAt(offset uint64, src []uint16) {
274
298
assert (src [indexSize ] == maxContainerSize )
275
299
bySize := uint16 (maxContainerSize ) - dstSize
276
300
// Select the portion to the right of the container, beyond its right boundary.
277
- ra .scootRight (offset + uint64 (dstSize ), bySize )
301
+ ra .scootRight (offset + uint64 (dstSize ), uint64 ( bySize ) )
278
302
ra .keys .updateOffsets (offset , uint64 (bySize ), true )
279
303
assert (copy (ra .data [offset :], src ) == len (src ))
280
304
return
@@ -300,7 +324,7 @@ func (ra *Bitmap) copyAt(offset uint64, src []uint16) {
300
324
301
325
bySize := uint16 (maxContainerSize ) - dstSize
302
326
// Select the portion to the right of the container, beyond its right boundary.
303
- ra .scootRight (offset + uint64 (dstSize ), bySize )
327
+ ra .scootRight (offset + uint64 (dstSize ), uint64 ( bySize ) )
304
328
ra .keys .updateOffsets (offset , uint64 (bySize ), true )
305
329
306
330
// Update the space of the container, so getContainer would work correctly.
@@ -315,7 +339,7 @@ func (ra *Bitmap) copyAt(offset uint64, src []uint16) {
315
339
316
340
// targetSize is not maxSize. Let's expand to targetSize and copy array.
317
341
bySize := targetSz - dstSize
318
- ra .scootRight (offset + uint64 (dstSize ), bySize )
342
+ ra .scootRight (offset + uint64 (dstSize ), uint64 ( bySize ) )
319
343
ra .keys .updateOffsets (offset , uint64 (bySize ), true )
320
344
assert (copy (ra .data [offset :], src ) == len (src ))
321
345
ra .data [offset ] = targetSz
@@ -390,14 +414,15 @@ func FromSortedList(vals []uint64) *Bitmap {
390
414
}
391
415
392
416
// Set the keys beforehand so that we don't need to move a lot of memory because of adding keys.
417
+ var numKeys int
393
418
for _ , x := range vals {
394
419
hi = x & mask
395
420
if hi != 0 && hi != lastHi {
396
- ra . setKey ( lastHi , 0 )
421
+ numKeys ++
397
422
}
398
423
lastHi = hi
399
424
}
400
- ra .setKey ( lastHi , 0 )
425
+ ra .initSpaceForKeys ( numKeys )
401
426
402
427
finalize := func (l []uint16 , key uint64 ) {
403
428
if len (l ) == 0 {
@@ -1184,3 +1209,111 @@ func FastOr(bitmaps ...*Bitmap) *Bitmap {
1184
1209
1185
1210
return dst
1186
1211
}
1212
+
1213
+ // Split splits the bitmap based on maxSz and the externalSize function. It splits the bitmap
1214
+ // such that size of each split bitmap + external size corresponding to its elements approximately
1215
+ // equal to maxSz (it can be greater than maxSz sometimes). The splits are returned in sorted order.
1216
+ // externalSize is a function that should return the external size corresponding to elements in
1217
+ // range [start, end). External size is used to calculate the split boundaries.
1218
+ func (bm * Bitmap ) Split (externalSize func (start , end uint64 ) uint64 , maxSz uint64 ) []* Bitmap {
1219
+ splitFurther := func (b * Bitmap ) []* Bitmap {
1220
+ itr := b .NewIterator ()
1221
+ newBm := NewBitmap ()
1222
+ var sz uint64
1223
+ var bms []* Bitmap
1224
+ for id := itr .Next (); id != 0 ; id = itr .Next () {
1225
+ sz += externalSize (id , addUint64 (id , 1 ))
1226
+ newBm .Set (id )
1227
+ if sz >= maxSz {
1228
+ bms = append (bms , newBm )
1229
+ newBm = NewBitmap ()
1230
+ sz = 0
1231
+ }
1232
+ }
1233
+
1234
+ if ! newBm .IsEmpty () {
1235
+ bms = append (bms , newBm )
1236
+ }
1237
+ return bms
1238
+ }
1239
+
1240
+ create := func (keyToOffset map [uint64 ]uint64 , totalSz uint64 ) []* Bitmap {
1241
+ var keys []uint64
1242
+ for key := range keyToOffset {
1243
+ keys = append (keys , key )
1244
+ }
1245
+ sort .Slice (keys , func (i , j int ) bool {
1246
+ return keys [i ] < keys [j ]
1247
+ })
1248
+
1249
+ newBm := NewBitmap ()
1250
+
1251
+ // First set all the keys.
1252
+ var containerSz uint64
1253
+ for _ , key := range keys {
1254
+ newBm .setKey (key , 0 )
1255
+
1256
+ // Calculate the size of the containers.
1257
+ cont := bm .getContainer (keyToOffset [key ])
1258
+ containerSz += uint64 (len (cont ))
1259
+ }
1260
+ // Allocate enough space to hold all the containers.
1261
+ beforeSize := len (newBm .data )
1262
+ newBm .fastExpand (containerSz )
1263
+ newBm .data = newBm .data [:beforeSize ]
1264
+
1265
+ // Now, we can populate the containers. For that, we first expand the
1266
+ // bitmap. Calculate the total size we need to allocate all these containers.
1267
+ for _ , key := range keys {
1268
+ cont := bm .getContainer (keyToOffset [key ])
1269
+ off := newBm .newContainer (uint16 (len (cont )))
1270
+ copy (newBm .data [off :], cont )
1271
+
1272
+ newBm .setKey (key , off )
1273
+ }
1274
+
1275
+ if newBm .GetCardinality () == 0 {
1276
+ return nil
1277
+ }
1278
+
1279
+ if totalSz > maxSz {
1280
+ return splitFurther (newBm )
1281
+ }
1282
+
1283
+ return []* Bitmap {newBm }
1284
+ }
1285
+
1286
+ var splits []* Bitmap
1287
+
1288
+ containerMap := make (map [uint64 ]uint64 )
1289
+ var totalSz uint64 // size of containers plus the external size of the container
1290
+
1291
+ for i := 0 ; i < bm .keys .numKeys (); i ++ {
1292
+ key := bm .keys .key (i )
1293
+ off := bm .keys .val (i )
1294
+ cont := bm .getContainer (off )
1295
+
1296
+ start , end := key , addUint64 (key , 1 << 16 )
1297
+ sz := externalSize (start , end ) + 2 * uint64 (cont [indexSize ]) // Converting to bytes.
1298
+
1299
+ // We can probably append more containers in the same bucket.
1300
+ if totalSz + sz < maxSz || len (containerMap ) == 0 {
1301
+ // Include this container in the container map.
1302
+ containerMap [key ] = off
1303
+ totalSz += sz
1304
+ continue
1305
+ }
1306
+
1307
+ // We have reached the maxSz limit. Hence, create a split.
1308
+ splits = append (splits , create (containerMap , totalSz )... )
1309
+
1310
+ containerMap = make (map [uint64 ]uint64 )
1311
+ containerMap [key ] = off
1312
+ totalSz = sz
1313
+ }
1314
+ if len (containerMap ) > 0 {
1315
+ splits = append (splits , create (containerMap , totalSz )... )
1316
+ }
1317
+
1318
+ return splits
1319
+ }
0 commit comments