Skip to content

Commit 7cc0ecb

Browse files
authored
Merge pull request #371 from nmslib/develop
Merge 0.6.2 to master
2 parents 21e20f3 + 9d933ac commit 7cc0ecb

File tree

4 files changed

+82
-36
lines changed

4 files changed

+82
-36
lines changed

README.md

+9-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ Header-only C++ HNSW implementation with python bindings.
44
**NEWS:**
55

66

7+
**version 0.6.2**
8+
9+
* Fixed a bug in saving of large pickles. The pickles with > 4GB could have been corrupted. Thanks Kai Wohlfahrt for reporting.
10+
* Thanks to ([@GuyAv46](https://github.com/GuyAv46)) hnswlib inner product now is more consitent accross architectures (SSE, AVX, etc).
11+
*
12+
713
**version 0.6.1**
814

915
* Thanks to ([@tony-kuo](https://github.com/tony-kuo)) hnswlib AVX512 and AVX builds are not backwards-compatible with older SSE and non-AVX512 architectures.
@@ -235,6 +241,9 @@ or you can install via pip:
235241

236242

237243
### For developers
244+
Contributions are highly welcome!
245+
246+
Please make pull requests against the `develop` branch.
238247

239248
When making changes please run tests (and please add a test to `python_bindings/tests` in case there is new functionality):
240249
```bash
@@ -259,10 +268,6 @@ https://github.com/dbaranchuk/ivf-hnsw
259268
* .Net implementation: https://github.com/microsoft/HNSW.Net
260269
* CUDA implementation: https://github.com/js1010/cuhnsw
261270
262-
### Contributing to the repository
263-
Contributions are highly welcome!
264-
265-
Please make pull requests against the `develop` branch.
266271
267272
### 200M SIFT test reproduction
268273
To download and extract the bigann dataset (from root directory):

hnswlib/space_ip.h

+61-23
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,15 @@ namespace hnswlib {
1010
for (unsigned i = 0; i < qty; i++) {
1111
res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
1212
}
13-
return (1.0f - res);
13+
return res;
1414

1515
}
1616

17+
static float
18+
InnerProductDistance(const void *pVect1, const void *pVect2, const void *qty_ptr) {
19+
return 1.0f - InnerProduct(pVect1, pVect2, qty_ptr);
20+
}
21+
1722
#if defined(USE_AVX)
1823

1924
// Favor using AVX if available.
@@ -61,8 +66,13 @@ namespace hnswlib {
6166

6267
_mm_store_ps(TmpRes, sum_prod);
6368
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
64-
return 1.0f - sum;
65-
}
69+
return sum;
70+
}
71+
72+
static float
73+
InnerProductDistanceSIMD4ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
74+
return 1.0f - InnerProductSIMD4ExtAVX(pVect1v, pVect2v, qty_ptr);
75+
}
6676

6777
#endif
6878

@@ -121,7 +131,12 @@ namespace hnswlib {
121131
_mm_store_ps(TmpRes, sum_prod);
122132
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
123133

124-
return 1.0f - sum;
134+
return sum;
135+
}
136+
137+
static float
138+
InnerProductDistanceSIMD4ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
139+
return 1.0f - InnerProductSIMD4ExtSSE(pVect1v, pVect2v, qty_ptr);
125140
}
126141

127142
#endif
@@ -156,7 +171,12 @@ namespace hnswlib {
156171
_mm512_store_ps(TmpRes, sum512);
157172
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
158173

159-
return 1.0f - sum;
174+
return sum;
175+
}
176+
177+
static float
178+
InnerProductDistanceSIMD16ExtAVX512(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
179+
return 1.0f - InnerProductSIMD16ExtAVX512(pVect1v, pVect2v, qty_ptr);
160180
}
161181

162182
#endif
@@ -196,15 +216,20 @@ namespace hnswlib {
196216
_mm256_store_ps(TmpRes, sum256);
197217
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
198218

199-
return 1.0f - sum;
219+
return sum;
220+
}
221+
222+
static float
223+
InnerProductDistanceSIMD16ExtAVX(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
224+
return 1.0f - InnerProductSIMD16ExtAVX(pVect1v, pVect2v, qty_ptr);
200225
}
201226

202227
#endif
203228

204229
#if defined(USE_SSE)
205230

206-
static float
207-
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
231+
static float
232+
InnerProductSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
208233
float PORTABLE_ALIGN32 TmpRes[8];
209234
float *pVect1 = (float *) pVect1v;
210235
float *pVect2 = (float *) pVect2v;
@@ -245,17 +270,24 @@ namespace hnswlib {
245270
_mm_store_ps(TmpRes, sum_prod);
246271
float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
247272

248-
return 1.0f - sum;
273+
return sum;
274+
}
275+
276+
static float
277+
InnerProductDistanceSIMD16ExtSSE(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
278+
return 1.0f - InnerProductSIMD16ExtSSE(pVect1v, pVect2v, qty_ptr);
249279
}
250280

251281
#endif
252282

253283
#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
254284
DISTFUNC<float> InnerProductSIMD16Ext = InnerProductSIMD16ExtSSE;
255285
DISTFUNC<float> InnerProductSIMD4Ext = InnerProductSIMD4ExtSSE;
286+
DISTFUNC<float> InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtSSE;
287+
DISTFUNC<float> InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtSSE;
256288

257289
static float
258-
InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
290+
InnerProductDistanceSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
259291
size_t qty = *((size_t *) qty_ptr);
260292
size_t qty16 = qty >> 4 << 4;
261293
float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
@@ -264,11 +296,11 @@ namespace hnswlib {
264296

265297
size_t qty_left = qty - qty16;
266298
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
267-
return res + res_tail - 1.0f;
299+
return 1.0f - (res + res_tail);
268300
}
269301

270302
static float
271-
InnerProductSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
303+
InnerProductDistanceSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
272304
size_t qty = *((size_t *) qty_ptr);
273305
size_t qty4 = qty >> 2 << 2;
274306

@@ -279,7 +311,7 @@ namespace hnswlib {
279311
float *pVect2 = (float *) pVect2v + qty4;
280312
float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
281313

282-
return res + res_tail - 1.0f;
314+
return 1.0f - (res + res_tail);
283315
}
284316
#endif
285317

@@ -290,30 +322,37 @@ namespace hnswlib {
290322
size_t dim_;
291323
public:
292324
InnerProductSpace(size_t dim) {
293-
fstdistfunc_ = InnerProduct;
325+
fstdistfunc_ = InnerProductDistance;
294326
#if defined(USE_AVX) || defined(USE_SSE) || defined(USE_AVX512)
295327
#if defined(USE_AVX512)
296-
if (AVX512Capable())
328+
if (AVX512Capable()) {
297329
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX512;
298-
else if (AVXCapable())
330+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX512;
331+
} else if (AVXCapable()) {
299332
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
333+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
334+
}
300335
#elif defined(USE_AVX)
301-
if (AVXCapable())
336+
if (AVXCapable()) {
302337
InnerProductSIMD16Ext = InnerProductSIMD16ExtAVX;
338+
InnerProductDistanceSIMD16Ext = InnerProductDistanceSIMD16ExtAVX;
339+
}
303340
#endif
304341
#if defined(USE_AVX)
305-
if (AVXCapable())
342+
if (AVXCapable()) {
306343
InnerProductSIMD4Ext = InnerProductSIMD4ExtAVX;
344+
InnerProductDistanceSIMD4Ext = InnerProductDistanceSIMD4ExtAVX;
345+
}
307346
#endif
308347

309348
if (dim % 16 == 0)
310-
fstdistfunc_ = InnerProductSIMD16Ext;
349+
fstdistfunc_ = InnerProductDistanceSIMD16Ext;
311350
else if (dim % 4 == 0)
312-
fstdistfunc_ = InnerProductSIMD4Ext;
351+
fstdistfunc_ = InnerProductDistanceSIMD4Ext;
313352
else if (dim > 16)
314-
fstdistfunc_ = InnerProductSIMD16ExtResiduals;
353+
fstdistfunc_ = InnerProductDistanceSIMD16ExtResiduals;
315354
else if (dim > 4)
316-
fstdistfunc_ = InnerProductSIMD4ExtResiduals;
355+
fstdistfunc_ = InnerProductDistanceSIMD4ExtResiduals;
317356
#endif
318357
dim_ = dim;
319358
data_size_ = dim * sizeof(float);
@@ -334,5 +373,4 @@ namespace hnswlib {
334373
~InnerProductSpace() {}
335374
};
336375

337-
338376
}

python_bindings/bindings.cpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -292,12 +292,12 @@ class Index {
292292
py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */
293293
std::unique_lock <std::mutex> templock(appr_alg->global);
294294

295-
unsigned int level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
296-
unsigned int link_npy_size = 0;
297-
std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
295+
size_t level0_npy_size = appr_alg->cur_element_count * appr_alg->size_data_per_element_;
296+
size_t link_npy_size = 0;
297+
std::vector<size_t> link_npy_offsets(appr_alg->cur_element_count);
298298

299299
for (size_t i = 0; i < appr_alg->cur_element_count; i++){
300-
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
300+
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
301301
link_npy_offsets[i]=link_npy_size;
302302
if (linkListSize)
303303
link_npy_size += linkListSize;
@@ -326,7 +326,7 @@ class Index {
326326
memcpy(element_levels_npy, appr_alg->element_levels_.data(), appr_alg->element_levels_.size() * sizeof(int));
327327

328328
for (size_t i = 0; i < appr_alg->cur_element_count; i++){
329-
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
329+
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
330330
if (linkListSize){
331331
memcpy(link_list_npy+link_npy_offsets[i], appr_alg->linkLists_[i], linkListSize);
332332
}
@@ -500,11 +500,11 @@ class Index {
500500

501501
memcpy(appr_alg->element_levels_.data(), element_levels_npy.data(), element_levels_npy.nbytes());
502502

503-
unsigned int link_npy_size = 0;
504-
std::vector<unsigned int> link_npy_offsets(appr_alg->cur_element_count);
503+
size_t link_npy_size = 0;
504+
std::vector<size_t> link_npy_offsets(appr_alg->cur_element_count);
505505

506506
for (size_t i = 0; i < appr_alg->cur_element_count; i++){
507-
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
507+
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
508508
link_npy_offsets[i]=link_npy_size;
509509
if (linkListSize)
510510
link_npy_size += linkListSize;
@@ -513,7 +513,7 @@ class Index {
513513
memcpy(appr_alg->data_level0_memory_, data_level0_npy.data(), data_level0_npy.nbytes());
514514

515515
for (size_t i = 0; i < appr_alg->max_elements_; i++) {
516-
unsigned int linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
516+
size_t linkListSize = appr_alg->element_levels_[i] > 0 ? appr_alg->size_links_per_element_ * appr_alg->element_levels_[i] : 0;
517517
if (linkListSize == 0) {
518518
appr_alg->linkLists_[i] = nullptr;
519519
} else {

setup.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import sys
3+
import platform
34

45
import numpy as np
56
import pybind11
@@ -86,6 +87,8 @@ class BuildExt(build_ext):
8687
}
8788

8889
if sys.platform == 'darwin':
90+
if platform.machine() == 'arm64':
91+
c_opts['unix'].remove('-march=native')
8992
c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
9093
link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
9194
else:

0 commit comments

Comments
 (0)