|
| 1 | +# C++ examples |
| 2 | + |
| 3 | +Creating index, inserting elements, searching and serialization |
| 4 | +```cpp |
| 5 | +#include "../../hnswlib/hnswlib.h" |
| 6 | + |
| 7 | + |
| 8 | +int main() { |
| 9 | + int dim = 16; // Dimension of the elements |
| 10 | + int max_elements = 10000; // Maximum number of elements, should be known beforehand |
| 11 | + int M = 16; // Tightly connected with internal dimensionality of the data |
| 12 | + // strongly affects the memory consumption |
| 13 | + int ef_construction = 200; // Controls index search speed/build speed tradeoff |
| 14 | + |
| 15 | + // Initing index |
| 16 | + hnswlib::L2Space space(dim); |
| 17 | + hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); |
| 18 | + |
| 19 | + // Generate random data |
| 20 | + std::mt19937 rng; |
| 21 | + rng.seed(47); |
| 22 | + std::uniform_real_distribution<> distrib_real; |
| 23 | + float* data = new float[dim * max_elements]; |
| 24 | + for (int i = 0; i < dim * max_elements; i++) { |
| 25 | + data[i] = distrib_real(rng); |
| 26 | + } |
| 27 | + |
| 28 | + // Add data to index |
| 29 | + for (int i = 0; i < max_elements; i++) { |
| 30 | + alg_hnsw->addPoint(data + i * dim, i); |
| 31 | + } |
| 32 | + |
| 33 | + // Query the elements for themselves and measure recall |
| 34 | + float correct = 0; |
| 35 | + for (int i = 0; i < max_elements; i++) { |
| 36 | + std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1); |
| 37 | + hnswlib::labeltype label = result.top().second; |
| 38 | + if (label == i) correct++; |
| 39 | + } |
| 40 | + float recall = correct / max_elements; |
| 41 | + std::cout << "Recall: " << recall << "\n"; |
| 42 | + |
| 43 | + // Serialize index |
| 44 | + std::string hnsw_path = "hnsw.bin"; |
| 45 | + alg_hnsw->saveIndex(hnsw_path); |
| 46 | + delete alg_hnsw; |
| 47 | + |
| 48 | + // Deserialize index and check recall |
| 49 | + alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, hnsw_path); |
| 50 | + correct = 0; |
| 51 | + for (int i = 0; i < max_elements; i++) { |
| 52 | + std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1); |
| 53 | + hnswlib::labeltype label = result.top().second; |
| 54 | + if (label == i) correct++; |
| 55 | + } |
| 56 | + recall = (float)correct / max_elements; |
| 57 | + std::cout << "Recall of deserialized index: " << recall << "\n"; |
| 58 | + |
| 59 | + delete[] data; |
| 60 | + delete alg_hnsw; |
| 61 | + return 0; |
| 62 | +} |
| 63 | +``` |
| 64 | + |
| 65 | +An example of filtering with a boolean function during the search: |
| 66 | +```cpp |
| 67 | +#include "../../hnswlib/hnswlib.h" |
| 68 | + |
| 69 | + |
| 70 | +// Filter that allows labels divisible by divisor |
| 71 | +class PickDivisibleIds: public hnswlib::BaseFilterFunctor { |
| 72 | +unsigned int divisor = 1; |
| 73 | + public: |
| 74 | + PickDivisibleIds(unsigned int divisor): divisor(divisor) { |
| 75 | + assert(divisor != 0); |
| 76 | + } |
| 77 | + bool operator()(hnswlib::labeltype label_id) { |
| 78 | + return label_id % divisor == 0; |
| 79 | + } |
| 80 | +}; |
| 81 | + |
| 82 | + |
| 83 | +int main() { |
| 84 | + int dim = 16; // Dimension of the elements |
| 85 | + int max_elements = 10000; // Maximum number of elements, should be known beforehand |
| 86 | + int M = 16; // Tightly connected with internal dimensionality of the data |
| 87 | + // strongly affects the memory consumption |
| 88 | + int ef_construction = 200; // Controls index search speed/build speed tradeoff |
| 89 | + |
| 90 | + // Initing index |
| 91 | + hnswlib::L2Space space(dim); |
| 92 | + hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); |
| 93 | + |
| 94 | + // Generate random data |
| 95 | + std::mt19937 rng; |
| 96 | + rng.seed(47); |
| 97 | + std::uniform_real_distribution<> distrib_real; |
| 98 | + float* data = new float[dim * max_elements]; |
| 99 | + for (int i = 0; i < dim * max_elements; i++) { |
| 100 | + data[i] = distrib_real(rng); |
| 101 | + } |
| 102 | + |
| 103 | + // Add data to index |
| 104 | + for (int i = 0; i < max_elements; i++) { |
| 105 | + alg_hnsw->addPoint(data + i * dim, i); |
| 106 | + } |
| 107 | + |
| 108 | + // Create filter that allows only even labels |
| 109 | + PickDivisibleIds pickIdsDivisibleByTwo(2); |
| 110 | + |
| 111 | + // Query the elements for themselves with filter and check returned labels |
| 112 | + int k = 10; |
| 113 | + for (int i = 0; i < max_elements; i++) { |
| 114 | + std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo); |
| 115 | + for (auto item: result) { |
| 116 | + if (item.second % 2 == 1) std::cout << "Error: found odd label\n"; |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + delete[] data; |
| 121 | + delete alg_hnsw; |
| 122 | + return 0; |
| 123 | +} |
| 124 | +``` |
| 125 | + |
| 126 | +An example with reusing the memory of the deleted elements when new elements are being added (via `allow_replace_deleted` flag): |
| 127 | +```cpp |
| 128 | +#include "../../hnswlib/hnswlib.h" |
| 129 | + |
| 130 | + |
| 131 | +int main() { |
| 132 | + int dim = 16; // Dimension of the elements |
| 133 | + int max_elements = 10000; // Maximum number of elements, should be known beforehand |
| 134 | + int M = 16; // Tightly connected with internal dimensionality of the data |
| 135 | + // strongly affects the memory consumption |
| 136 | + int ef_construction = 200; // Controls index search speed/build speed tradeoff |
| 137 | + |
| 138 | + // Initing index |
| 139 | + hnswlib::L2Space space(dim); |
| 140 | + hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 100, true); |
| 141 | + |
| 142 | + // Generate random data |
| 143 | + std::mt19937 rng; |
| 144 | + rng.seed(47); |
| 145 | + std::uniform_real_distribution<> distrib_real; |
| 146 | + float* data = new float[dim * max_elements]; |
| 147 | + for (int i = 0; i < dim * max_elements; i++) { |
| 148 | + data[i] = distrib_real(rng); |
| 149 | + } |
| 150 | + |
| 151 | + // Add data to index |
| 152 | + for (int i = 0; i < max_elements; i++) { |
| 153 | + alg_hnsw->addPoint(data + i * dim, i); |
| 154 | + } |
| 155 | + |
| 156 | + // Mark first half of elements as deleted |
| 157 | + int num_deleted = max_elements / 2; |
| 158 | + for (int i = 0; i < num_deleted; i++) { |
| 159 | + alg_hnsw->markDelete(i); |
| 160 | + } |
| 161 | + |
| 162 | + float* add_data = new float[dim * num_deleted]; |
| 163 | + for (int i = 0; i < dim * num_deleted; i++) { |
| 164 | + add_data[i] = distrib_real(rng); |
| 165 | + } |
| 166 | + |
| 167 | + // Replace deleted data with new elements |
| 168 | + // Maximum number of elements is reached therefore we cannot add new items, |
| 169 | + // but we can replace the deleted ones by using replace_deleted=true |
| 170 | + for (int i = 0; i < num_deleted; i++) { |
| 171 | + int label = max_elements + i; |
| 172 | + alg_hnsw->addPoint(add_data + i * dim, label, true); |
| 173 | + } |
| 174 | + |
| 175 | + delete[] data; |
| 176 | + delete[] add_data; |
| 177 | + delete alg_hnsw; |
| 178 | + return 0; |
| 179 | +} |
| 180 | +``` |
| 181 | + |
| 182 | +Multithreaded examples: |
| 183 | +* Creating index, inserting elements, searching [example_mt_search.cpp](example_mt_search.cpp) |
| 184 | +* Filtering during the search with a boolean function [example_mt_filter.cpp](example_mt_filter.cpp) |
| 185 | +* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp) |
0 commit comments