From f38e37973c513d1748660275ccfbf261734fb977 Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Mon, 13 Jan 2025 19:13:27 +0530 Subject: [PATCH 1/5] Remove restriction of caching onlu 10 % nodes in disk search --- src/pq_flash_index.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index d9ad50617..f03c6514f 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -355,13 +355,12 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: tsl::robin_set node_set; - // Do not cache more than 10% of the nodes in the index - uint64_t tenp_nodes = (uint64_t)(std::round(this->_num_points * 0.1)); - if (num_nodes_to_cache > tenp_nodes) + // If num_nodes_to_cache is more than total_nodes then reduce num_nodes_to_cache to total_nodes. + if (num_nodes_to_cache > this->_num_points) { - diskann::cout << "Reducing nodes to cache from: " << num_nodes_to_cache << " to: " << tenp_nodes - << "(10 percent of total nodes:" << this->_num_points << ")" << std::endl; - num_nodes_to_cache = tenp_nodes == 0 ? 1 : tenp_nodes; + diskann::cout << "Reducing nodes to cache from: " << num_nodes_to_cache + << " to total nodes:" << this->_num_points << std::endl; + num_nodes_to_cache = this->_num_points; } diskann::cout << "Caching " << num_nodes_to_cache << "..." << std::endl; From ce56e0c6ff270507661a0458423fb3860226d30e Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Tue, 21 Jan 2025 17:05:19 +0530 Subject: [PATCH 2/5] Account for initialisation and PQ training in latency calculation for disk search. --- src/pq_flash_index.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index d9ad50617..2b93ebe35 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -1276,7 +1276,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t if (beam_width > num_sector_per_nodes * defaults::MAX_N_SECTOR_READS) throw ANNException("Beamwidth can not be higher than defaults::MAX_N_SECTOR_READS", -1, __FUNCSIG__, __FILE__, __LINE__); - + Timer query_timer; ScratchStoreManager> manager(this->_thread_data); auto data = manager.scratch_space(); IOContext &ctx = data->ctx; @@ -1350,7 +1350,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t diskann::aggregate_coords(ids, n_ids, this->data, this->_n_chunks, pq_coord_scratch); diskann::pq_dist_lookup(pq_coord_scratch, n_ids, this->_n_chunks, pq_dists, dists_out); }; - Timer query_timer, io_timer, cpu_timer; + Timer io_timer, cpu_timer; tsl::robin_set &visited = query_scratch->visited; NeighborPriorityQueue &retset = query_scratch->retset; From b7d1520b10d27794473154b513e9eec577acbc59 Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Fri, 24 Jan 2025 19:29:54 +0530 Subject: [PATCH 3/5] Add PQ Computation in cpu_timer --- src/pq_flash_index.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index 2b93ebe35..4c6b988e2 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -1276,7 +1276,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t if (beam_width > num_sector_per_nodes * defaults::MAX_N_SECTOR_READS) throw ANNException("Beamwidth can not be higher than defaults::MAX_N_SECTOR_READS", -1, __FUNCSIG__, __FILE__, __LINE__); - Timer query_timer; + Timer query_timer, io_timer, cpu_timer; ScratchStoreManager> manager(this->_thread_data); auto data = manager.scratch_space(); IOContext &ctx = data->ctx; @@ -1334,11 +1334,16 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t const uint64_t num_sectors_per_node = _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); + cpu_timer.reset(); // query <-> PQ chunk centers distances _pq_table.preprocess_query(query_rotated); // center the query and rotate if // we have a rotation matrix float *pq_dists = pq_query_scratch->aligned_pqtable_dist_scratch; _pq_table.populate_chunk_distances(query_rotated, pq_dists); + if (stats != nullptr) + { + stats->cpu_us += (float)cpu_timer.elapsed(); + } // query <-> neighbor list float *dist_scratch = pq_query_scratch->aligned_dist_scratch; @@ -1350,7 +1355,6 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t diskann::aggregate_coords(ids, n_ids, this->data, this->_n_chunks, pq_coord_scratch); diskann::pq_dist_lookup(pq_coord_scratch, n_ids, this->_n_chunks, pq_dists, dists_out); }; - Timer io_timer, cpu_timer; tsl::robin_set &visited = query_scratch->visited; NeighborPriorityQueue &retset = query_scratch->retset; From 68e7bb65720716392252d7c78dc1a2ce7bd4b62a Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Fri, 24 Jan 2025 15:00:26 +0000 Subject: [PATCH 4/5] Run clang-format on the repo --- python/include/builder.h | 11 +++++------ python/include/dynamic_memory_index.h | 18 ++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/python/include/builder.h b/python/include/builder.h index 6b1a5b4f3..06102ac62 100644 --- a/python/include/builder.h +++ b/python/include/builder.h @@ -18,10 +18,9 @@ void build_disk_index(diskann::Metric metric, const std::string &data_file_path, template void build_memory_index(diskann::Metric metric, const std::string &vector_bin_path, - const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, - float alpha, uint32_t num_threads, bool use_pq_build, - size_t num_pq_bytes, bool use_opq, bool use_tags = false, - const std::string& filter_labels_file = "", const std::string& universal_label = "", - uint32_t filter_complexity = 0); + const std::string &index_output_path, uint32_t graph_degree, uint32_t complexity, float alpha, + uint32_t num_threads, bool use_pq_build, size_t num_pq_bytes, bool use_opq, + bool use_tags = false, const std::string &filter_labels_file = "", + const std::string &universal_label = "", uint32_t filter_complexity = 0); -} +} // namespace diskannpy diff --git a/python/include/dynamic_memory_index.h b/python/include/dynamic_memory_index.h index 02d6b8cce..6cbe15ced 100644 --- a/python/include/dynamic_memory_index.h +++ b/python/include/dynamic_memory_index.h @@ -18,8 +18,7 @@ namespace py = pybind11; namespace diskannpy { -template -class DynamicMemoryIndex +template class DynamicMemoryIndex { public: DynamicMemoryIndex(diskann::Metric m, size_t dimensions, size_t max_vectors, uint32_t complexity, @@ -31,19 +30,18 @@ class DynamicMemoryIndex void load(const std::string &index_path); int insert(const py::array_t &vector, DynamicIdType id); py::array_t batch_insert(py::array_t &vectors, - py::array_t &ids, int32_t num_inserts, - int num_threads = 0); + py::array_t &ids, + int32_t num_inserts, int num_threads = 0); int mark_deleted(DynamicIdType id); void save(const std::string &save_path, bool compact_before_save = false); - NeighborsAndDistances search(py::array_t &query, uint64_t knn, - uint64_t complexity); - NeighborsAndDistances batch_search(py::array_t &queries, - uint64_t num_queries, uint64_t knn, uint64_t complexity, - uint32_t num_threads); + NeighborsAndDistances search(py::array_t &query, + uint64_t knn, uint64_t complexity); + NeighborsAndDistances batch_search( + py::array_t &queries, uint64_t num_queries, uint64_t knn, + uint64_t complexity, uint32_t num_threads); void consolidate_delete(); size_t num_points(); - private: const uint32_t _initial_search_complexity; const diskann::IndexWriteParameters _write_parameters; From 88743fd69c0a9257ab427af173f0270c02fe5c25 Mon Sep 17 00:00:00 2001 From: Suryansh Gupta Date: Thu, 30 Jan 2025 00:01:35 +0530 Subject: [PATCH 5/5] Log PQ Training time as well. --- apps/search_disk_index.cpp | 6 ++++-- include/percentile_stats.h | 1 + src/pq_flash_index.cpp | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/apps/search_disk_index.cpp b/apps/search_disk_index.cpp index 6b0793db7..bd3aafc2e 100644 --- a/apps/search_disk_index.cpp +++ b/apps/search_disk_index.cpp @@ -179,7 +179,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre std::string recall_string = "Recall@" + std::to_string(recall_at); diskann::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) - << "Mean IO (us)" << std::setw(16) << "CPU (s)"; + << "Mean IO (us)" << std::setw(16) << "CPU (s)" << std::setw(16)<< "PQ Training(s)"; if (calc_recall_flag) { diskann::cout << std::setw(16) << recall_string << std::endl; @@ -272,6 +272,8 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre auto mean_io_us = diskann::get_mean_stats(stats, query_num, [](const diskann::QueryStats &stats) { return stats.io_us; }); + auto mean_pq_training_us = diskann::get_mean_stats(stats, query_num, + [](const diskann::QueryStats &stats) { return stats.pq_training_us; }); double recall = 0; if (calc_recall_flag) @@ -283,7 +285,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre diskann::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios - << std::setw(16) << mean_io_us << std::setw(16) << mean_cpuus; + << std::setw(16) << mean_io_us << std::setw(16) << mean_cpuus << std::setw(16) << mean_pq_training_us; if (calc_recall_flag) { diskann::cout << std::setw(16) << recall << std::endl; diff --git a/include/percentile_stats.h b/include/percentile_stats.h index 793257577..fbd740930 100644 --- a/include/percentile_stats.h +++ b/include/percentile_stats.h @@ -23,6 +23,7 @@ struct QueryStats float total_us = 0; // total time to process query in micros float io_us = 0; // total time spent in IO float cpu_us = 0; // total time spent in CPU + float pq_training_us = 0; // total time spent in PQ training unsigned n_4k = 0; // # of 4kB reads unsigned n_8k = 0; // # of 8kB reads diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index 42e006131..c5b83836d 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -1275,7 +1275,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t if (beam_width > num_sector_per_nodes * defaults::MAX_N_SECTOR_READS) throw ANNException("Beamwidth can not be higher than defaults::MAX_N_SECTOR_READS", -1, __FUNCSIG__, __FILE__, __LINE__); - Timer query_timer, io_timer, cpu_timer; + Timer query_timer, io_timer, cpu_timer, pq_training_timer; ScratchStoreManager> manager(this->_thread_data); auto data = manager.scratch_space(); IOContext &ctx = data->ctx; @@ -1334,6 +1334,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t _nnodes_per_sector > 0 ? 1 : DIV_ROUND_UP(_max_node_len, defaults::SECTOR_LEN); cpu_timer.reset(); + pq_training_timer.reset(); // query <-> PQ chunk centers distances _pq_table.preprocess_query(query_rotated); // center the query and rotate if // we have a rotation matrix @@ -1341,6 +1342,7 @@ void PQFlashIndex::cached_beam_search(const T *query1, const uint64_t _pq_table.populate_chunk_distances(query_rotated, pq_dists); if (stats != nullptr) { + stats->pq_training_us = (float)pq_training_timer.elapsed(); stats->cpu_us += (float)cpu_timer.elapsed(); }